#!/bin/bash

# CCCF优化版启动脚本
# 使用优化设置解决并发问题

echo "🚀 启动CCCF优化版爬虫..."

# 设置环境变量
export PYTHONPATH="${PYTHONPATH}:$(pwd)"

# 创建输出目录
mkdir -p outputs/cccf_optimized

# 使用优化设置启动爬虫
scrapy crawl cccf_site \
    -s SETTINGS_MODULE=fire_control_spider.settings_optimized \
    -s LOG_LEVEL=INFO \
    -s CONCURRENT_REQUESTS=16 \
    -s CONCURRENT_REQUESTS_PER_DOMAIN=8 \
    -s DOWNLOAD_DELAY=0.1 \
    -s RANDOMIZE_DOWNLOAD_DELAY=True \
    -s AUTOTHROTTLE_ENABLED=True \
    -s AUTOTHROTTLE_START_DELAY=1 \
    -s AUTOTHROTTLE_MAX_DELAY=3 \
    -s AUTOTHROTTLE_TARGET_CONCURRENCY=8.0 \
    -s DOWNLOAD_TIMEOUT=30 \
    -s RETRY_TIMES=3 \
    -s COOKIES_ENABLED=False \
    -s FEED_FORMAT=json \
    -s FEED_URI=outputs/cccf_optimized/cccf_data_$(date +%Y%m%d_%H%M%S).json \
    -s FEED_EXPORT_ENCODING=utf-8 \
    -s LOG_FILE=outputs/cccf_optimized/spider.log \
    -s LOG_LEVEL=INFO

echo "✅ CCCF优化版爬虫启动完成"
echo "📊 输出文件: outputs/cccf_optimized/"
echo "📝 日志文件: outputs/cccf_optimized/spider.log"
echo ""
echo "🔧 优化设置说明:"
echo "  - 并发数: 16 (总) / 8 (每域名)"
echo "  - 下载延迟: 0.1秒"
echo "  - AutoThrottle目标并发: 8.0"
echo "  - 禁用cookies提高性能" 